Data preparation

First we define the variables which will be present in the dataset

# Define variables
devices = c("Samsung", "Nokia")
n_devices = length(devices)
sizes = c("small", "medium", "large")
n_sizes = length(sizes)
n_repetitions = 10
repetitions = 0:(n_repetitions-1)
message_types = c("text", "image", "video", "audio", "document")
n_message_types = length(message_types)
apps = c("Telegram", "WhatsApp", "Messenger")
n_apps = length(apps)
n = n_devices * n_sizes * n_repetitions * n_message_types * n_apps

Load Data

Then we load the dataset from a csv file, clean it and separate the network data (which is aggregated over message types) from the other data which is not aggregated.

# Determine which columns to select
col_selection = c("device", "app_name", "size_name", "repetition", "message_type", "time_sec", "cpu", "memory", "energy_simple_J", "tcp_no_packets", "tcp_size_packets_MB", "udp_no_packets", "udp_size_packets_MB")
# Load data
data = read.table(data_path, sep=",", header=TRUE)
# Select only 900 rows and wanted columns
data = data[1:900, col_selection]
# Rename several columns
colnames(data) = c("device", "app", "size", "repetition", "message_type", "time_sec", "cpu", "memory", "energy", "tcp_no", "tcp_size", "udp_no", "udp_size")
# Make energy values positive
data$energy = -1 * data$energy
# Reorder levels
data$size = factor(data$size, levels=sizes)
data$app = factor(data$app, levels=apps)
data$message_type = factor(data$message_type, levels=message_types)

# Split into regular and network data
network_data = data
# Clean regular data
regular_col_selection = c("device", "app", "size", "repetition", "message_type", "time_sec", "cpu", "memory", "energy")
data = data[, regular_col_selection]
# Clean network data
network_col_selection = c("device", "app", "size", "repetition", "time_sec", "energy", "tcp_no", "tcp_size", "udp_no", "udp_size")
network_data = network_data[, network_col_selection]
for(device in devices) {
  for(app in apps) {
    for(size in sizes) {
      for(repetition in repetitions) {
        slice = network_data[network_data$device==device & network_data$app==app & network_data$size==size & network_data$repetition==repetition,]
        row_name = rownames(slice)[1]
        network_data[row_name,"time_sec"] = sum(slice$time_sec)
        network_data[row_name,"energy"] = sum(slice$energy)
      }
    }
  }
}
# Remove NA values
network_data = na.omit(network_data) 

Data Exploration

Then we begin exploring the data

Metrics

We start by exploring the different metrics: the dependent variable (energy consumption) as well as the support metrics (Memory, CPU and TDP & UDP)

# Define metrics
# Energy, CPU and Memory
metrics = c("energy", "cpu", "memory")
metric_titles = c("Energy", "CPU", "Memory")
metric_labels = c("Energy (Joule)", "CPU utilization (%)", "Memory usage (bytes)")
# Network metrics
network_metrics = c("tcp_no", "tcp_size", "udp_no", "udp_size")
network_metric_titles = c("TCP #packets", "TCP size", "UDP #packets", "UDP size")
network_metric_labels = c("TCP (number of packets)", "TCP size (MB)", "UDP (number of packets)", "UDP size (MB)")

Descriptive statistics

First we calculate descriptive statistics for the non-aggregated metrics

metric_summary = data %>%
  summarise(across(all_of(metrics), .fns = 
                     list(min = min,
                          median = median,
                          mean = mean,
                          sd = sd,
                          max = max))) %>%
  pivot_longer(everything(), names_sep='_', names_to=c('variable', '.value'))
metric_summary
## # A tibble: 3 × 6
##   variable      min   median     mean       sd      max
##   <chr>       <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 energy       4.43    104.     109.     33.9     264. 
## 2 cpu          1.6      14.4     15.0     5.62     60.8
## 3 memory   78327.   151277.  150540.  49075.   262608.

Then we calculate descriptive statistics for the aggregated metrics (network metrics)

network_summary = network_data %>%
  summarise(across(all_of(network_metrics), .fns = 
                     list(min = min,
                          median = median,
                          mean = mean,
                          sd = sd,
                          max = max))) %>%
  pivot_longer(everything(), names_pattern="(tcp_no|tcp_size|udp_no|udp_size)_(.*)", names_to=c('variable', '.value'))
network_summary
## # A tibble: 4 × 6
##   variable     min   median     mean       sd   max
##   <chr>      <dbl>    <dbl>    <dbl>    <dbl> <dbl>
## 1 tcp_no   151     3728.    4978.    2692.    16959
## 2 tcp_size   0.021    1        1.54     1.48     13
## 3 udp_no     2      169      295.     737.     9401
## 4 udp_size   0        0.074    0.173    0.692     9

Distributions and normality

For each metric we will:
(i) create a density plot
(ii) create a qq-plot
(iii) perform the Shapiro-Wilk test
To inspect the distribution of each metric and assess normality

plot_density = function(df, metric, title, label) {
  # Calculate mean and sd
  mean = mean(df[[metric]]) %>% round(digits=2)
  median = median(df[[metric]]) %>% round(digits=2)
  sd = sd(df[[metric]]) %>% round(digits=2)
  
  # Create a density plot
  return(ggplot(df, aes(x=.data[[metric]])) +
    geom_density() +
    theme_pubr() +
    stat_central_tendency(type = "mean", color = "red", linetype = "dashed") +
    stat_central_tendency(type = "median", color = "blue", linetype = "dashed") +
    # annotate("text", x = mean*1.01, y=0.1 , label = paste("Mean =", mean)) +
    geom_vline(xintercept=c(mean+sd, mean-sd), color = "red", linetype="dotted") +
    labs(title=title,
         x=label) + 
    theme(plot.title = element_text(hjust = 0.5)))
}

assess_normality = function(df, metric, title, label) {
  # Create a density plot
  print(plot_density(df, metric, title, label))
  ggsave(sprintf("%snormality/metrics/%s_density_plot.png", exploratory_figures_path, metric))
  # Create a QQ-plot
  png(sprintf("%snormality/metrics/%s_qq_plot.png", exploratory_figures_path, metric))
  qqPlot(df[[metric]], ylab=label, main=paste("QQ-plot of", title))
  dev.off()
  qqPlot(df[[metric]], ylab=label, main=paste("QQ-plot of", title))
  # Perform Shapiro-Wilk test
  shapiro_test = shapiro.test(df[[metric]])
  print(title)
  print(shapiro_test)
}

assess_normalities = function(df, metrics, titles, labels) {
  assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels))
  for(i in 1:length(metrics)) {
    assess_normality(df, metrics[i], titles[i], labels[i])
  }
}

# Assess normality for Energy, CPU and Memory
assess_normalities(data, metrics, metric_titles, metric_labels)
## Saving 7 x 5 in image

## [1] "Energy"
## 
##  Shapiro-Wilk normality test
## 
## data:  df[[metric]]
## W = 0.97065, p-value = 1.712e-12
## Saving 7 x 5 in image

## [1] "CPU"
## 
##  Shapiro-Wilk normality test
## 
## data:  df[[metric]]
## W = 0.90509, p-value < 2.2e-16
## Saving 7 x 5 in image

## [1] "Memory"
## 
##  Shapiro-Wilk normality test
## 
## data:  df[[metric]]
## W = 0.90574, p-value < 2.2e-16
# Assess normality for Network metrics
assess_normalities(network_data, network_metrics, network_metric_titles, network_metric_labels)
## Saving 7 x 5 in image

## [1] "TCP #packets"
## 
##  Shapiro-Wilk normality test
## 
## data:  df[[metric]]
## W = 0.81779, p-value = 1e-13
## Saving 7 x 5 in image

## [1] "TCP size"
## 
##  Shapiro-Wilk normality test
## 
## data:  df[[metric]]
## W = 0.5628, p-value < 2.2e-16
## Saving 7 x 5 in image

## [1] "UDP #packets"
## 
##  Shapiro-Wilk normality test
## 
## data:  df[[metric]]
## W = 0.26134, p-value < 2.2e-16
## Saving 7 x 5 in image

## [1] "UDP size"
## 
##  Shapiro-Wilk normality test
## 
## data:  df[[metric]]
## W = 0.16913, p-value < 2.2e-16

If p<0.05 we reject the null hypothesis that the metric is normally distributed.
Otherwise we retain the assumption of normality.

Investigate metrics per device

To see if the data shows great difference between devices we make density plots for all metrics per device

plot_density_per_device = function(df, metric, title, label) {
  return(ggplot(df, aes(x=.data[[metric]], fill=device)) +
  geom_density(alpha=0.4) +
  theme_pubr() +
  labs(title=sprintf("Density plot of %s per Device", title),
       x=label) + 
  theme(legend.title=element_blank(),
        plot.title = element_text(hjust = 0.5)))
}

plot_densities_per_device = function(df, metrics, titles, labels) {
  assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels))
  for(i in 1:length(metrics)) {
    print(plot_density_per_device(df, metrics[i], titles[i], labels[i]))
    ggsave(sprintf("%smetrics_per_device/%s_density_plot_per_device.png", exploratory_figures_path, metrics[i]))
  }
}

# Energy, CPU and Memory
plot_densities_per_device(data, metrics, metric_titles, metric_labels)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

# Network metrics
plot_densities_per_device(network_data, network_metrics, network_metric_titles, network_metric_labels)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

Further inspect memory

As the memory data is clearly multimodal, even when split on device, we make density plots for our different independent variables to identify other causes.

ggplot(data, aes(x=message_type, y=memory, group=message_type)) +
  facet_grid(device ~ app) +
  geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=message_type)) +
  geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
  stat_summary(fun.y=mean, colour="black", geom="point",
               shape=5, size=1, show_guide = FALSE) +
  # theme_pubr() #+
  labs(title="Violin plot of Memory per group",
       x="Message type",
       y="Memory") +
  theme(legend.title=element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.text=element_text(size=8))
## Warning: The `show_guide` argument of `layer()` is deprecated as of ggplot2 2.0.0.
## ℹ Please use the `show.legend` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggsave(sprintf("%smemory/memory_violin_box_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image

We see that memory differs a lot for Messenger, and for the Samsung device differences between Telegram and WhatsApp are also large.

Further inspect CPU

ggplot(data, aes(x=message_type, y=cpu, group=message_type)) +
  facet_grid(device ~ app) +
  geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=message_type)) +
  geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
  stat_summary(fun.y=mean, colour="black", geom="point",
               shape=5, size=1, show_guide = FALSE) +
  # theme_pubr() #+
  labs(title="Violin plot of CPU per group",
       x="Message type",
       y="CPU") +
  theme(legend.title=element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.text=element_text(size=8))

ggsave(sprintf("%scpu/cpu_violin_box_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image

Further inspect network metrics

TCP is also clearly multi-modal and therefore we further inspect network metrics as well and make density plots for our different independent variables.

plot_violin_box_network_metric_facet = function(df, metric, title, label) {
  return(ggplot(df, aes(x=size, y=.data[[metric]], group=size)) +
    facet_grid(device ~ app) +
    geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=size)) +
    geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
    stat_summary(fun.y=mean, colour="black", geom="point",
                 shape=5, size=1, show_guide = FALSE) +
    # theme_pubr() #+
    labs(title=sprintf("Violin plot of %s per group", title),
         # x="",
         y=label) +
    theme(legend.title=element_blank(),
          plot.title = element_text(hjust = 0.5)))
}

plot_violin_box_network_metrics_facet = function(df, metrics, titles, labels) {
  assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels))
  for(i in 1:length(metrics)) {
    print(plot_violin_box_network_metric_facet(df, metrics[i], titles[i], labels[i]))
    ggsave(sprintf("%snetwork/per_group/%s_violin_box_plot_per_group.png", exploratory_figures_path, metrics[i]))
  }
}

plot_violin_box_network_metrics_facet(network_data, network_metrics, network_metric_titles, network_metric_labels)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

Again we see a similar pattern with Messenger being very different from Telegram and WhatsApp for TCP. For UDP we do not see such a difference. Furthermore, for all network measures we spot several extreme values.

descriptives_network_metric = function(mt) {
  return(network_data %>%
    group_by(app, size) %>%
    summarise(across(all_of(mt), .fns = 
                       list(min = min,
                            median = median,
                            mean = mean,
                            sd = sd,
                            max = max))))
}

descriptives_network_metrics = function(metrics) {
  for(metric in metrics) {
    print(descriptives_network_metric(metric))
  }
}

descriptives_network_metrics(network_metrics)
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 7
## # Groups:   app [3]
##   app       size   tcp_no_min tcp_no_median tcp_no_mean tcp_no_sd tcp_no_max
##   <fct>     <fct>       <int>         <dbl>       <dbl>     <dbl>      <int>
## 1 Telegram  small        2872         3664.       3735.      725.       6462
## 2 Telegram  medium       2730         3668.       3978.     1725.      11215
## 3 Telegram  large        3504         3762        4244.     2124.      13247
## 4 WhatsApp  small         151         2854        2735.      994.       5136
## 5 WhatsApp  medium       2745         2899        3238.     1410.       9204
## 6 WhatsApp  large        2734         2885        3601.     3146.      16959
## 7 Messenger small        1536         7764.       7589.     1823.      10167
## 8 Messenger medium       6271         7868.       8039.     2155.      16322
## 9 Messenger large        5990         7522        7641.     1083.       8959
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 7
## # Groups:   app [3]
##   app       size   tcp_size_min tcp_size_median tcp_size_mean tcp_size_sd
##   <fct>     <fct>         <dbl>           <dbl>         <dbl>       <dbl>
## 1 Telegram  small         0.725           0.938         1.02        0.472
## 2 Telegram  medium        0.801           0.972         1.26        1.35 
## 3 Telegram  large         0.947           1             1.35        1.57 
## 4 WhatsApp  small         0.021           0.67          0.680       0.367
## 5 WhatsApp  medium        0.674           0.689         0.972       1.19 
## 6 WhatsApp  large         0.708           0.735         1.36        2.74 
## 7 Messenger small         0.747           2             2.24        0.582
## 8 Messenger medium        2               2             2.6         1.57 
## 9 Messenger large         2               2             2.35        0.489
## # ℹ 1 more variable: tcp_size_max <dbl>
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 7
## # Groups:   app [3]
##   app       size   udp_no_min udp_no_median udp_no_mean udp_no_sd udp_no_max
##   <fct>     <fct>       <int>         <dbl>       <dbl>     <dbl>      <int>
## 1 Telegram  small           2          65          86.4      73.3        255
## 2 Telegram  medium          6          88.5       127.      107.         381
## 3 Telegram  large           6         140         179       153.         561
## 4 WhatsApp  small           8          62.5       129.      154.         547
## 5 WhatsApp  medium          8          40          95.2     108.         341
## 6 WhatsApp  large           8          59.5       109.      111.         380
## 7 Messenger small         134         352.        434.      285.        1442
## 8 Messenger medium        227         426        1027.     2027.        9401
## 9 Messenger large         256         463         466.      175.         792
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
## # A tibble: 9 × 7
## # Groups:   app [3]
##   app       size   udp_size_min udp_size_median udp_size_mean udp_size_sd
##   <fct>     <fct>         <dbl>           <dbl>         <dbl>       <dbl>
## 1 Telegram  small         0              0.019         0.0356      0.0419
## 2 Telegram  medium        0.001          0.0395        0.0587      0.0605
## 3 Telegram  large         0.001          0.0795        0.0998      0.106 
## 4 WhatsApp  small         0.001          0.0155        0.0564      0.0808
## 5 WhatsApp  medium        0.001          0.004         0.0418      0.0648
## 6 WhatsApp  large         0.001          0.0125        0.0444      0.0631
## 7 Messenger small         0.059          0.138         0.214       0.211 
## 8 Messenger medium        0.062          0.211         0.769       1.99  
## 9 Messenger large         0.063          0.217         0.240       0.111 
## # ℹ 1 more variable: udp_size_max <dbl>

Due to outliers, tables with descriptive statistics are more informative.

plot_violin_box_network_metric_per_app = function(df, metric, title, label) {
  return(ggplot(df, aes(x=app, y=.data[[metric]], group=app)) +
    geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=app)) +
    geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
    stat_summary(fun.y=mean, colour="black", geom="point",
                 shape=5, size=1, show_guide = FALSE) +
    # theme_pubr() #+
    labs(title=sprintf("Violin plot of %s per app", title),
         # x="",
         y=label) +
    theme(legend.title=element_blank(),
          plot.title = element_text(hjust = 0.5)))
}

plot_violin_box_network_metrics_per_app = function(df, metrics, titles, labels) {
  assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels))
  for(i in 1:length(metrics)) {
    print(plot_violin_box_network_metric_per_app(df, metrics[i], titles[i], labels[i]))
    ggsave(sprintf("%snetwork/per_app/%s_violin_box_plot_per_app.png", exploratory_figures_path, metrics[i]))
  }
}

plot_violin_box_network_metrics_per_app(network_data, network_metrics, network_metric_titles, network_metric_labels)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

The pattern clearly returns on the aggregated level

Relationship between support metrics and energy

To be able to explain the energy consumption, we explore its relationship with the support metrics.
We will use the pearson correlation if the support metric and energy are both normally distributed. We will use the non-parametric spearman correlation otherwise.

plot_scatter = function(df, xMetric, yMetric, xTitle, yTitle, xLabel, yLabel, correlation) {
  return(ggplot(df, aes(x=.data[[xMetric]], y=.data[[yMetric]])) +
    geom_point() +
    geom_smooth(method='lm', formula=y ~ x) +
    stat_cor(method=correlation) +
    theme_pubr() +
    labs(title=sprintf("Scatterplot of %s and %s", xTitle, yTitle),
         x=xLabel,
         y=yLabel) + 
    theme(plot.title = element_text(hjust = 0.5)))
}

plot_scatters = function(df, metrics, titles, labels, correlations) {
  assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels) & length(metrics) == length(correlations))
  for(i in 2:length(metrics)) {
    print(plot_scatter(df, metrics[1], metrics[i], titles[1], titles[i], labels[1], labels[i], correlations[i]))
    ggsave(sprintf("%smetric_correlations/energy/%s_%s_scatter_plot.png", exploratory_figures_path, metrics[1], metrics[i]))
  }
}

# Scatter plots with CPU and Memory
correlations = c("spearman", "spearman", "spearman")
plot_scatters(data, metrics, metric_titles, metric_labels, correlations)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

# Scatter plots with Network metrics
network_correlations = c("spearman", "spearman", "spearman", "spearman", "spearman")
plot_scatters(network_data, c("energy", network_metrics), c("Energy", network_metric_titles), c("Energy (Joule)", network_metric_labels), network_correlations)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

As earlier we found that energy is non-normally distributed, we have only used Spearman’s correlation.
Energy and CPU show a clear relation, whereas the other support metrics do not.

Relationship between duration and metrics

Because the duration of the trials varies so much, we decided to see how that affects our metrics

plot_scatter(data, metrics[1], "time_sec", metric_titles[1], "Duration", metric_labels[1], "Trial duration (seconds)", "spearman")

ggsave(sprintf("%smetric_correlations/duration/%s_duration_scatter_plot.png", exploratory_figures_path, metrics[1]))
## Saving 7 x 5 in image

Again we used spearman’s correlation, among others because of the non-normality of energy consumption.
We see a very clear correlation between energy and duration which should be taken into account when interpreting the results.

plot_scatter_duration = function(df, xMetric, yMetric, xTitle, yTitle, xLabel, yLabel, correlation) {
  return(ggplot(df, aes(x=.data[[xMetric]], y=.data[[yMetric]])) +
    geom_point(aes(color=app)) +
    geom_smooth(method='lm', formula=y ~ x, color="black") +
    stat_cor(method=correlation) +
    theme_pubr() +
    labs(title=sprintf("Scatterplot of %s and %s", xTitle, yTitle),
         x=xLabel,
         y=yLabel) + 
    theme(plot.title = element_text(hjust = 0.5)))
}

plot_scatters_duration = function(df, metrics, titles, labels, correlations) {
  assert("metrics, titles and labels are of equal length", length(metrics) == length(titles) & length(metrics) == length(labels) & length(metrics) == length(correlations))
  for(i in 2:length(metrics)) {
    print(plot_scatter_duration(df, metrics[1], metrics[i], titles[1], titles[i], labels[1], labels[i], correlations[i]))
    ggsave(sprintf("%smetric_correlations/duration/duration_%s_scatter_plot.png", exploratory_figures_path, metrics[i]))
  }
}

# Scatter plots with Network metrics
network_correlations = c("spearman", "spearman", "spearman", "spearman", "spearman")
plot_scatters_duration(network_data, c("time_sec", network_metrics), c("Duration", network_metric_titles), c("Trial duration (seconds)", network_metric_labels), network_correlations)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

Because the TCP data showed clear differences between apps, we color the datapoints based on that.\ Based on that we see no clear correlation emerging between duration and the network metrics.

Comparing groups based on energy

To explore our dependent variable further we look how energy is distributed for our different independent variables

plot_violin_box = function (df, group, title, label) {
  return(ggplot(data, aes(x=.data[[group]], y=energy, fill=.data[[group]])) +
    geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE) +
    geom_boxplot(alpha=1, color="black", width=.4, fill="white") +
    stat_summary(fun=mean, colour="black", geom="point",
                 shape=5, size=1, show_guide = FALSE) +
    theme_pubr() +
    labs(title=paste("Violin plot of Energy per", title),
         x=label,
         y="Energy (Joules)") + 
    theme(legend.title=element_blank(),
          plot.title = element_text(hjust = 0.5)))
}

plot_density_per_group = function (df, group, title, label) {
  return(ggplot(data, aes(x=energy, fill=.data[[group]])) +
  geom_density(alpha=0.4) +
  theme_pubr() +
  labs(title=paste("Density plot of Energy per", title),
       x="Energy (Joules)") + 
  theme(legend.title=element_blank(),
        plot.title = element_text(hjust = 0.5)))
}

groups = c("message_type", "app", "size")
titles = c("message type", "app", "size")
labels = c("Message type", "App", "size")

plots_per_group = function(df, groups, titles, labels) {
  assert("groups, titles and labels are of equal length", length(groups) == length(titles) & length(groups) == length(labels))
  for(i in 1:length(groups)) {
    print(plot_violin_box(data, groups[i], titles[i], labels[i]))
    ggsave(sprintf("%senergy/per_%s/energy_violin_box_plot_per_%s.png", exploratory_figures_path, groups[i], groups[i]))
    print(plot_density_per_group(data, groups[i], titles[i], labels[i]))
    ggsave(sprintf("%senergy/per_%s/energy_density_plot_per_%s.png", exploratory_figures_path, groups[i], groups[i]))
  }
}

plots_per_group(data, groups, titles, labels)
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

All

We also investigate how energy is distributed for all 45 groups (combination of message type, app and size)

ggplot(data, aes(x=message_type, y=energy, group=message_type)) +
  facet_grid(size ~ app) +
  geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=message_type)) +
  geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
  stat_summary(fun.y=mean, colour="black", geom="point",
               shape=5, size=1, show_guide = FALSE) +
  # theme_pubr() #+
  labs(title="Violin plot of Energy per group",
       x="Message type",
       y="Energy (Joules)") +
  theme(legend.title=element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.text=element_text(size=8))

ggsave(sprintf("%senergy/energy_violin_box_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image

Explore effect of duration

Because duration correlated strongly with energy consumption we explore how it is different for each group too

ggplot(data, aes(x=message_type, y=time_sec, group=message_type)) +
  facet_grid(size ~ app) +
  geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=message_type)) +
  geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
  stat_summary(fun.y=mean, colour="black", geom="point",
               shape=5, size=1, show_guide = FALSE) +
  # theme_pubr() #+
  labs(title="Violin plot of Duration per group",
       x="Message type",
       y="Trial duration (seconds)") +
  theme(legend.title=element_blank(),
        plot.title = element_text(hjust = 0.5),
        axis.text=element_text(size=8))

ggsave(sprintf("%sduration/duration_violin_box_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image

We find that for Messenger the duration differs greatly for the different message types.

ggplot(data[data$app=="Messenger",], aes(x=time_sec, fill=message_type)) +
  geom_density(alpha=0.4) +
  theme_pubr() +
  labs(title=paste("Density plot of Duration per message type on Messenger"),
       x="Trial duration (seconds)") +
  theme(legend.title=element_blank(),
        plot.title = element_text(hjust = 0.5))

ggsave(sprintf("%sduration/duration_density_plot_per_group.png", exploratory_figures_path))
## Saving 7 x 5 in image

Which we can also see (possibly even more clearly) in this plot

Descriptive Statistics for research questions

Now we dive into the different research questions and calculate descriptive statistics for them.

RQ1

For the first research question we check how the energy differs per message type

summary_rq1_small = data %>%
  group_by(message_type) %>%
  summarise(count = n(),
            mean = mean(energy),
            sd = sd(energy))
summary_rq1_small
## # A tibble: 5 × 4
##   message_type count  mean    sd
##   <fct>        <int> <dbl> <dbl>
## 1 text           180  110.  30.9
## 2 image          180  115.  36.3
## 3 video          180  102.  34.1
## 4 audio          180  107.  32.2
## 5 document       180  110.  34.5

and how that differs per size as well

summary_rq1_large = data %>%
  group_by(message_type, size) %>%
  summarise(count = n(),
            mean = mean(energy),
            sd = sd(energy))
## `summarise()` has grouped output by 'message_type'. You can override using the
## `.groups` argument.
summary_rq1_large
## # A tibble: 15 × 5
## # Groups:   message_type [5]
##    message_type size   count  mean    sd
##    <fct>        <fct>  <int> <dbl> <dbl>
##  1 text         small     60 112.   34.9
##  2 text         medium    60 103.   29.3
##  3 text         large     60 116.   27.0
##  4 image        small     60 111.   43.1
##  5 image        medium    60 116.   31.2
##  6 image        large     60 119.   33.7
##  7 video        small     60  96.0  37.0
##  8 video        medium    60 103.   35.4
##  9 video        large     60 109.   28.9
## 10 audio        small     60  99.8  30.5
## 11 audio        medium    60 105.   30.6
## 12 audio        large     60 117.   33.4
## 13 document     small     60  99.3  32.0
## 14 document     medium    60 114.   29.3
## 15 document     large     60 118.   39.2

Text: medium size lower mean than small and large (not for other message_types)

RQ2

For the first research question we check how the energy differs per app

summary_rq2_small = data %>%
  group_by(app) %>%
  summarise(count = n(),
            mean = mean(energy),
            sd = sd(energy))
summary_rq2_small
## # A tibble: 3 × 4
##   app       count  mean    sd
##   <fct>     <int> <dbl> <dbl>
## 1 Telegram    300  109.  33.8
## 2 WhatsApp    300  116.  31.4
## 3 Messenger   300  103.  35.1

and also how that differs for each message type

summary_rq2_large = data %>%
  group_by(app, message_type) %>%
  summarise(count = n(),
            mean = mean(energy),
            sd = sd(energy))
## `summarise()` has grouped output by 'app'. You can override using the `.groups`
## argument.
summary_rq2_large
## # A tibble: 15 × 5
## # Groups:   app [3]
##    app       message_type count  mean    sd
##    <fct>     <fct>        <int> <dbl> <dbl>
##  1 Telegram  text            60 109.   36.2
##  2 Telegram  image           60 115.   34.6
##  3 Telegram  video           60 107.   33.0
##  4 Telegram  audio           60 104.   34.2
##  5 Telegram  document        60 109.   31.1
##  6 WhatsApp  text            60 103.   26.5
##  7 WhatsApp  image           60 128.   34.8
##  8 WhatsApp  video           60 119.   26.5
##  9 WhatsApp  audio           60 115.   27.5
## 10 WhatsApp  document        60 116.   36.2
## 11 Messenger text            60 119.   27.5
## 12 Messenger image           60 104.   36.2
## 13 Messenger video           60  80.7  30.9
## 14 Messenger audio           60 104.   33.9
## 15 Messenger document        60 107.   36.0

and also how that differs per size

summary = data %>%
  group_by(app, message_type, size) %>%
  summarise(count = n(),
            mean = mean(energy),
            sd = sd(energy))
## `summarise()` has grouped output by 'app', 'message_type'. You can override
## using the `.groups` argument.
summary
## # A tibble: 45 × 6
## # Groups:   app, message_type [15]
##    app      message_type size   count  mean    sd
##    <fct>    <fct>        <fct>  <int> <dbl> <dbl>
##  1 Telegram text         small     20 118.   51.5
##  2 Telegram text         medium    20  99.3  25.9
##  3 Telegram text         large     20 109.   23.8
##  4 Telegram image        small     20 107.   41.7
##  5 Telegram image        medium    20 118.   28.3
##  6 Telegram image        large     20 119.   33.1
##  7 Telegram video        small     20 107.   38.9
##  8 Telegram video        medium    20 105.   35.0
##  9 Telegram video        large     20 110.   25.1
## 10 Telegram audio        small     20  93.2  22.1
## # ℹ 35 more rows

Hypothesis testing

We plan to use an ANOVA with 3 independent variables (size, IM app and message type)

Assumption checking

The ANOVA has several assumptions which we need to check

1. Normality

The first assumption is normality, which needs to be checked for the dependent variable and the residuals

A. Dependent variable

We check the normality of the dependent variable (energy) for every combination of groups in multiple ways:
(i) By testing for it statistically using the Shapiro-Wilk test

shapiro = data %>%
  group_by(app, message_type, size) %>%
    summarise(w.statistic = shapiro.test(energy)$statistic,
              p.value = shapiro.test(energy)$p.value) %>%
      arrange(w.statistic)
## `summarise()` has grouped output by 'app', 'message_type'. You can override
## using the `.groups` argument.
shapiro
## # A tibble: 45 × 5
## # Groups:   app, message_type [15]
##    app       message_type size  w.statistic    p.value
##    <fct>     <fct>        <fct>       <dbl>      <dbl>
##  1 Telegram  document     large       0.639 0.00000763
##  2 Messenger audio        large       0.699 0.0000379 
##  3 Telegram  video        small       0.738 0.000116  
##  4 Telegram  image        large       0.741 0.000129  
##  5 Telegram  image        small       0.743 0.000138  
##  6 Telegram  document     small       0.754 0.000194  
##  7 Telegram  text         large       0.759 0.000224  
##  8 Messenger image        large       0.772 0.000340  
##  9 Telegram  text         small       0.810 0.00124   
## 10 WhatsApp  document     large       0.819 0.00169   
## # ℹ 35 more rows

If p<0.05 we reject the null hypothesis that the dependent variable (energy) is normally distributed.
Otherwise we retain the assumption of normality.
As almost all p-values are significant, the data does not meet the assumption of normality.

  1. by inspecting the density and QQ-plots
# Function to index a grouped dataframe using a row number and column name
index_df = function(df, row, column_name) {
  return(unlist(df[row, column_name]))
}

# Function to subset the Data dataset based on values in the Shapiro dataset
subset_data = function(row) {
  subset = subset(data, app==index_df(shapiro, row, "app") & size==index_df(shapiro, row, "size" & message_type==index_df(shapiro, row, "message_type")))
  return(subset)
}

for(i in 1:nrow(shapiro)) {
  # Extract the right column values
  app_val = index_df(shapiro, i, "app")
  size_val = index_df(shapiro, i, "size")
  message_type_val = index_df(shapiro, i, "message_type")
  # Create a subset using these
  subset = subset(data, app==app_val & size==size_val & message_type==message_type_val)
  # Create a density plot
  png(sprintf("%snormality/energy_per_group/energy_density_plot_for_%s_messages_on_%s_of_%s_size.png", exploratory_figures_path, message_type_val, tolower(app_val), size_val))
  plot(density(subset$energy), main=sprintf("Density plot for %s messages on %s of %s size", message_type_val, app_val, size_val))
  dev.off()
  plot(density(subset$energy), main=sprintf("Density plot for %s messages on %s of %s size", message_type_val, app_val, size_val))
  # Create a QQ-plot
  png(sprintf("%snormality/energy_per_group/energy_qq_plot_for_%s_messages_on_%s_of_%s_size.png", exploratory_figures_path, message_type_val, app_val, size_val))
  qqPlot(subset$energy, main=sprintf("QQ-plot for %s messages on %s of size %s", message_type_val, app_val, size_val), ylab="Energy (Joules)")
  dev.off()
  qqPlot(subset$energy, main=sprintf("QQ-plot for %s messages on %s of size %s", message_type_val, app_val, size_val), ylab="Energy (Joules)")
}

Also these indicate that the assumption of normality is not met.

B. Residuals

The residuals also need to be normally distributed, which we also assess by:

res.aov = aov(energy ~ message_type * app * size, data = data)
residuals = residuals(res.aov)
  1. By testing for it statistically using the Shapiro-Wilk test
shapiro.test(residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals
## W = 0.95939, p-value = 4.252e-15

If p<0.05 we reject the null hypothesis that the residuals are normally distributed.
Otherwise we retain the assumption of normality.
Given the p-value, we have to reject the null hypothesis that the residuals are normally distributed.

  1. by inspecting the density and QQ-plot
# Create a density plot
png(sprintf("%snormality/energy_residuals_density_plot.png", exploratory_figures_path))
plot(density(residuals), main="Density plot of the Residuals")
dev.off()
## png 
##   2
plot(density(residuals), main="Density plot of the Residuals")

# Create a QQ-plot
png(sprintf("%snormality/energy_residuals_qq_plot.png", exploratory_figures_path))
qqPlot(residuals, ylab="Residuals", main="QQ-plot of the Residuals")
## [1] 608 609
dev.off()
## png 
##   2
qqPlot(residuals, ylab="Residuals", main="QQ-plot of the Residuals")

## [1] 608 609

2. Homoscedacity

We check whether the variances of each combination of groups are roughly equal

leveneTest(energy ~ message_type * app * size, data = data)
## Levene's Test for Homogeneity of Variance (center = median)
##        Df F value Pr(>F)
## group  44  0.7262 0.9079
##       855

If p<0.05 we reject the null hypothesis that all combinations of groups have equal variance.
Otherwise we retain the assumption of equal variances.\ Though homoscedacity was not violated, normality was violated for almost all groups as well as for the residuals. Therefore, we chose to do use an ART ANOVA instead.

ART ANOVA

We will measure the effect size of the significant effects using partial eta squared

art_transformed = art(energy ~ message_type * app * size, data = data)
summary(art_transformed)
## Aligned Rank Transform of Factorial Model
## 
## Call:
## art(formula = energy ~ message_type * app * size, data = data)
## 
## Column sums of aligned responses (should all be ~0):
##          message_type                   app                  size 
##                     0                     0                     0 
##      message_type:app     message_type:size              app:size 
##                     0                     0                     0 
## message_type:app:size 
##                     0 
## 
## F values of ANOVAs on aligned responses not of interest (should all be ~0):
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0
res.art = anova(art_transformed)
# Include partial eta squared as effect size measure
res.art$eta.sq.part = with(res.art, `Sum Sq`/(`Sum Sq` + `Sum Sq.res`))
res.art
## Analysis of Variance of Aligned Rank Transformed Data
## 
## Table Type: Anova Table (Type III tests) 
## Model: No Repeated Measures (lm)
## Response: art(energy)
## 
##                         Df Df.res F value     Pr(>F) eta.sq.part    
## 1 message_type           4    855  4.5240  0.0012747    0.020726  **
## 2 app                    2    855 26.1792 9.2272e-12    0.057704 ***
## 3 size                   2    855 14.8384 4.6249e-07    0.033545 ***
## 4 message_type:app       8    855 10.2722 8.7730e-14    0.087686 ***
## 5 message_type:size      8    855  1.4449  0.1738362    0.013339    
## 6 app:size               4    855  3.7666  0.0048004    0.017316  **
## 7 message_type:app:size 16    855  2.4670  0.0011235    0.044129  **
## ---
## Signif. codes:   0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Follow up tests

We will measure the effect size of the significant effects between levels using Cliff’s Delta

RQ1

# Comparing differences per message_type (RQ1)
# We leave out size as message_type:size is non-significant
comparisons_message_type = summary(art.con(art_transformed, "message_type", adjust="BH"))
## NOTE: Results may be misleading due to involvement in interactions
# comparisons_message_type
c_delta_message_types = c()
# For all combinations of message_types
for(i in 1:(n_message_types-1)) {
  for(j in (i+1):n_message_types) {
    mt_i = message_types[i]
    mt_j = message_types[j]
    # Calculate Cliff's Delta
    c_delta = cliff.delta(data[data$message_type==mt_i,]$energy, data[data$message_type==mt_j,]$energy)
    # Save Cliff's Delta
    c_delta_message_types = c(c_delta_message_types, c_delta$estimate)
  }
}
comparisons_message_type$c.delta = c_delta_message_types
print(comparisons_message_type)
##  contrast         estimate   SE  df t.ratio p.value c.delta
##  text - image       -36.95 27.5 855  -1.344  0.2987 -0.0633
##  text - video        75.80 27.5 855   2.757  0.0198  0.1319
##  text - audio        17.07 27.5 855   0.621  0.5943  0.0586
##  text - document     -2.47 27.5 855  -0.090  0.9284 -0.0155
##  image - video      112.75 27.5 855   4.102  0.0004  0.1946
##  image - audio       54.02 27.5 855   1.965  0.0995  0.1251
##  image - document    34.48 27.5 855   1.254  0.3001  0.0715
##  video - audio      -58.73 27.5 855  -2.137  0.0823 -0.0755
##  video - document   -78.27 27.5 855  -2.847  0.0198 -0.1283
##  audio - document   -19.54 27.5 855  -0.711  0.5943 -0.0643
## 
## Results are averaged over the levels of: app, size 
## P value adjustment: BH method for 10 tests

Only text - video, video - image, video - document are significant

RQ2

For the main research questions, we will first compare the differences between the apps only

# Comparing differences between apps (RQ2)
comparisons_app = summary(art.con(art_transformed, "app", adjust="BH"))
## NOTE: Results may be misleading due to involvement in interactions
# comparisons_app
c_delta_apps = c()
# For all combinations of message_types
for(i in 1:(n_apps-1)) {
  for(j in (i+1):n_apps) {
    app_i = apps[i]
    app_j = apps[j]
    # Calculate Cliff's Delta
    c_delta = cliff.delta(data[data$app==app_i,]$energy, data[data$app==app_j,]$energy)
    # Save Cliff's Delta
    c_delta_apps = c(c_delta_apps, c_delta$estimate)
  }
}
comparisons_app$c.delta = c_delta_apps
print(comparisons_app)
##  contrast             estimate SE  df t.ratio p.value c.delta
##  Telegram - WhatsApp    -108.8 21 855  -5.183  <.0001 -0.2363
##  Telegram - Messenger     37.4 21 855   1.781  0.0753  0.0758
##  WhatsApp - Messenger    146.2 21 855   6.964  <.0001  0.2645
## 
## Results are averaged over the levels of: message_type, size 
## P value adjustment: BH method for 3 tests

WhatsApp is significantly worse than the others, there is no significant difference between Telegram and Messenger

Then we (as the interaction between app and size is significant) will also look at how the size plays a role

# Comparing differences between apps per size (RQ2)
comparisons_app_size = summary(art.con(art_transformed, "app:size", adjust="none"))
## NOTE: Results may be misleading due to involvement in interactions
# Select columns of interest
cols_comparisons_app_size = which(str_detect(comparisons_app_size$contrast , ".*small.*small.*|.*medium.*medium.*|.*large.*large.*"))
comparisons_app_size = comparisons_app_size[cols_comparisons_app_size, ]
# Correct p-values
comparisons_app_size$p.value = p.adjust(comparisons_app_size$p.value, method = "BH")
c_delta_app_size = c()
# For all combinations of message_types
for(size in sort(sizes)) {
  for(i in 1:(n_apps-1)) {
    for(j in (i+1):n_apps) {
      app_i = sort(apps)[i]
      app_j = sort(apps)[j]
      # Calculate Cliff's Delta
      c_delta = cliff.delta(data[data$size==size & data$app==app_i,]$energy, data[data$size==size & data$app==app_j,]$energy)
      # Save Cliff's Delta
      c_delta_app_size = c(c_delta_app_size, c_delta$estimate)
    }
  }
}
# print(c_delta_message_types)
comparisons_app_size$c.delta = c_delta_app_size
print(comparisons_app_size)
##  contrast                           estimate   SE  df t.ratio p.value c.delta
##  Messenger,large - Telegram,large      -38.5 35.6 855  -1.082  0.3147 -0.0614
##  Messenger,large - WhatsApp,large      -89.5 35.6 855  -2.515  0.0181 -0.1804
##  Messenger,medium - Telegram,medium     30.4 35.6 855   0.855  0.3926 -0.1508
##  Messenger,medium - WhatsApp,medium   -112.4 35.6 855  -3.158  0.0037  0.0438
##  Messenger,small - Telegram,small     -105.1 35.6 855  -2.953  0.0058 -0.2226
##  Messenger,small - WhatsApp,small     -234.7 35.6 855  -6.594  <.0001 -0.3250
##  Telegram,large - WhatsApp,large       -51.0 35.6 855  -1.433  0.1957 -0.2154
##  Telegram,medium - WhatsApp,medium    -142.9 35.6 855  -4.013  0.0003 -0.3860
##  Telegram,small - WhatsApp,small      -129.6 35.6 855  -3.640  0.0009 -0.2354
## 
## Results are averaged over the levels of: message_type

For the subquestions we will also take into account the message type

First we will disregard the size and see if we can find differences between apps per message type

# Comparing differences between apps per message_type (RQ2)
comparisons_message_type_app = summary(art.con(art_transformed, "message_type:app", adjust="none"))
## NOTE: Results may be misleading due to involvement in interactions
# Select columns of interest
cols_comparisons_message_type_app = which(str_detect(comparisons_message_type_app$contrast , "^text.*text.*|^image.*image.*|^audio.*audio.*|^video.*video.*|^document.*document.*"))
comparisons_message_type_app = comparisons_message_type_app[cols_comparisons_message_type_app, ]
# Correct p-values
comparisons_message_type_app$p.value = p.adjust(comparisons_message_type_app$p.value, method = "BH")
# comparisons_message_type_app
c_delta_message_types_apps = c()
# For all combinations of message_types
for(mt in sort(message_types)) {
  for(i in 1:(n_apps-1)) {
    for(j in (i+1):n_apps) {
      app_i = sort(apps)[i]
      app_j = sort(apps)[j]
      # Calculate Cliff's Delta
      c_delta = cliff.delta(data[data$message_type==mt & data$app==app_i,]$energy, data[data$message_type==mt & data$app==app_j,]$energy)
      # Save Cliff's Delta
      c_delta_message_types_apps = c(c_delta_message_types_apps, c_delta$estimate)
    }
  }
}
# print(c_delta_message_types)
comparisons_message_type_app$c.delta = c_delta_message_types_apps
print(comparisons_message_type_app)
##  contrast                               estimate   SE  df t.ratio p.value
##  audio,Messenger - audio,Telegram          -38.2 45.1 855  -0.847  0.4584
##  audio,Messenger - audio,WhatsApp         -142.2 45.1 855  -3.150  0.0045
##  audio,Telegram - audio,WhatsApp          -104.0 45.1 855  -2.303  0.0359
##  document,Messenger - document,Telegram     28.2 45.1 855   0.624  0.5707
##  document,Messenger - document,WhatsApp    -73.3 45.1 855  -1.623  0.1312
##  document,Telegram - document,WhatsApp    -101.5 45.1 855  -2.247  0.0373
##  image,Messenger - image,Telegram          -76.5 45.1 855  -1.694  0.1235
##  image,Messenger - image,WhatsApp         -227.6 45.1 855  -5.042  <.0001
##  image,Telegram - image,WhatsApp          -151.1 45.1 855  -3.347  0.0032
##  text,Messenger - text,Telegram            131.5 45.1 855   2.913  0.0069
##  text,Messenger - text,WhatsApp            140.9 45.1 855   3.122  0.0045
##  text,Telegram - text,WhatsApp               9.4 45.1 855   0.208  0.8351
##  video,Messenger - video,Telegram         -211.9 45.1 855  -4.694  <.0001
##  video,Messenger - video,WhatsApp         -351.2 45.1 855  -7.780  <.0001
##  video,Telegram - video,WhatsApp          -139.3 45.1 855  -3.086  0.0045
##  c.delta
##  -0.0772
##  -0.3100
##  -0.3161
##   0.0139
##  -0.1400
##  -0.2611
##  -0.1811
##  -0.4267
##  -0.2744
##   0.3783
##   0.3606
##  -0.0461
##  -0.4739
##  -0.6656
##  -0.3061
## 
## Results are averaged over the levels of: size
plot_violin_box_message_type = function(df, mt) {
 return(ggplot(df[df$message_type==mt,], aes(x=app, y=energy, group=app)) +
  geom_violin(trim = FALSE, alpha = 0.5, show_guide = FALSE, aes(color=app)) +
  geom_boxplot(alpha=1, color="black", width=.3, fill="white") +
  stat_summary(fun.y=mean, colour="black", geom="point",
               shape=5, size=1, show_guide = FALSE) +
  # theme_pubr() #+
  labs(title=sprintf("Violin plot of Energy of %s messages per app", mt),
       # x="",
       y="Energy (Joules)") +
  theme(legend.title=element_blank(),
        plot.title = element_text(hjust = 0.5)))
}

plot_density_message_type = function(df, mt) {
  return(ggplot(df[df$message_type==mt,], aes(x=energy, fill=app)) +
  geom_density(alpha=0.4) +
  theme_pubr() +
  labs(title=sprintf("Density plot of Energy of %s messages per app", mt),
       x="Energy (Joules)") + 
  theme(legend.title=element_blank(),
        plot.title = element_text(hjust = 0.5)))
}

plot_message_type = function(df, mts) {
  for(mt in mts) {
    print(plot_violin_box_message_type(df, mt))
    ggsave(sprintf("%senergy/per_app/message_type/energy_violin_box_plot_for_%s_messages_per_app.png", exploratory_figures_path, mt))
    print(plot_density_message_type(df, mt))
    ggsave(sprintf("%senergy/per_app/message_type/energy_density_plot_for_%s_messages_per_app.png", exploratory_figures_path, mt))
  }
}

plot_message_type(data, c("audio", "document", "image", "text", "video"))
## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

## Saving 7 x 5 in image

For documents we can only say that Telegram is significantly more energy efficient than WhatsApp.
For audio we can only say that Messenger is significantly more energy efficient than WhatsApp.
For images we can only say that Telegram and Messenger are significantly more energy efficient than WhatsApp.
For text we can say that Messenger is significantly more efficient than WhatsApp and Telegram, but we cannot differentiate between WhatsApp and Telegram in terms of energy consumption.
For video we can say that Messenger is significantly more energy efficient than Telegram which is is significantly more energy efficient than WhatsApp (and Messenger is (thus) also significantly more energy efficient than WhatsApp).

Then we will also include size to see if we still can make claims at the most detailed level

# Comparing differences between apps per message_type and size (RQ2)
comparisons_message_type_app_size = summary(art.con(art_transformed, "message_type:app:size", adjust="none"))
# Select columns of interest
cols_comparisons_message_type_app_size = which(str_detect(comparisons_message_type_app_size$contrast , "(?=^text.*text.*|^image.*image.*|^audio.*audio.*|^video.*video.*|^document.*document.*)(?=.*small.*small$|.*medium.*medium$|.*large.*large$)"))
comparisons_message_type_app_size = comparisons_message_type_app_size[cols_comparisons_message_type_app_size, ]
# Correct p-values
comparisons_message_type_app_size$p.value = p.adjust(comparisons_message_type_app_size$p.value, method= "BH")
# comparisons_message_type_app_size
c_delta_message_types_apps_sizes = c()
# For all combinations of message_types
for(size in sort(sizes)) {
  for(mt in sort(message_types)) {
    for(i in 1:(n_apps-1)) {
      for(j in (i+1):n_apps) {
        app_i = sort(apps)[i]
        app_j = sort(apps)[j]
        # Calculate Cliff's Delta
        c_delta = cliff.delta(data[data$size==size & data$message_type==mt & data$app==app_i,]$energy, data[data$size==size & data$message_type==mt & data$app==app_j,]$energy)
        # Save Cliff's Delta
        c_delta_message_types_apps_sizes = c(c_delta_message_types_apps_sizes, c_delta$estimate)
      }
    }
  }
}
# print(c_delta_message_types)
comparisons_message_type_app_size$c.delta = c_delta_message_types_apps_sizes
print(comparisons_message_type_app_size)
##  contrast                                             estimate   SE  df t.ratio
##  audio,Messenger,large - audio,Telegram,large          -222.80 75.1 855  -2.965
##  audio,Messenger,large - audio,WhatsApp,large          -199.90 75.1 855  -2.660
##  audio,Messenger,medium - audio,Telegram,medium         127.25 75.1 855   1.694
##  audio,Messenger,medium - audio,WhatsApp,medium        -144.90 75.1 855  -1.928
##  audio,Messenger,small - audio,Telegram,small            36.95 75.1 855   0.492
##  audio,Messenger,small - audio,WhatsApp,small          -105.45 75.1 855  -1.403
##  audio,Telegram,large - audio,WhatsApp,large             22.90 75.1 855   0.305
##  audio,Telegram,medium - audio,WhatsApp,medium         -272.15 75.1 855  -3.622
##  audio,Telegram,small - audio,WhatsApp,small           -142.40 75.1 855  -1.895
##  document,Messenger,large - document,Telegram,large      -5.00 75.1 855  -0.067
##  document,Messenger,large - document,WhatsApp,large     -44.75 75.1 855  -0.596
##  document,Messenger,medium - document,Telegram,medium   222.90 75.1 855   2.967
##  document,Messenger,medium - document,WhatsApp,medium    60.55 75.1 855   0.806
##  document,Messenger,small - document,Telegram,small    -192.75 75.1 855  -2.565
##  document,Messenger,small - document,WhatsApp,small    -259.30 75.1 855  -3.451
##  document,Telegram,large - document,WhatsApp,large      -39.75 75.1 855  -0.529
##  document,Telegram,medium - document,WhatsApp,medium   -162.35 75.1 855  -2.161
##  document,Telegram,small - document,WhatsApp,small      -66.55 75.1 855  -0.886
##  image,Messenger,large - image,Telegram,large             4.95 75.1 855   0.066
##  image,Messenger,large - image,WhatsApp,large           -64.30 75.1 855  -0.856
##  image,Messenger,medium - image,Telegram,medium        -135.25 75.1 855  -1.800
##  image,Messenger,medium - image,WhatsApp,medium        -215.55 75.1 855  -2.869
##  image,Messenger,small - image,Telegram,small           -89.05 75.1 855  -1.185
##  image,Messenger,small - image,WhatsApp,small          -358.65 75.1 855  -4.773
##  image,Telegram,large - image,WhatsApp,large            -69.25 75.1 855  -0.922
##  image,Telegram,medium - image,WhatsApp,medium          -80.30 75.1 855  -1.069
##  image,Telegram,small - image,WhatsApp,small           -269.60 75.1 855  -3.588
##  text,Messenger,large - text,Telegram,large             121.70 75.1 855   1.620
##  text,Messenger,large - text,WhatsApp,large              64.30 75.1 855   0.856
##  text,Messenger,medium - text,Telegram,medium           153.30 75.1 855   2.040
##  text,Messenger,medium - text,WhatsApp,medium           152.25 75.1 855   2.026
##  text,Messenger,small - text,Telegram,small             149.70 75.1 855   1.992
##  text,Messenger,small - text,WhatsApp,small             212.20 75.1 855   2.824
##  text,Telegram,large - text,WhatsApp,large              -57.40 75.1 855  -0.764
##  text,Telegram,medium - text,WhatsApp,medium             -1.05 75.1 855  -0.014
##  text,Telegram,small - text,WhatsApp,small               62.50 75.1 855   0.832
##  video,Messenger,large - video,Telegram,large           -93.70 75.1 855  -1.247
##  video,Messenger,large - video,WhatsApp,large          -175.25 75.1 855  -2.332
##  video,Messenger,medium - video,Telegram,medium        -228.95 75.1 855  -3.047
##  video,Messenger,medium - video,WhatsApp,medium        -390.55 75.1 855  -5.198
##  video,Messenger,small - video,Telegram,small          -253.75 75.1 855  -3.377
##  video,Messenger,small - video,WhatsApp,small          -423.00 75.1 855  -5.630
##  video,Telegram,large - video,WhatsApp,large            -81.55 75.1 855  -1.085
##  video,Telegram,medium - video,WhatsApp,medium         -161.60 75.1 855  -2.151
##  video,Telegram,small - video,WhatsApp,small           -169.25 75.1 855  -2.253
##  p.value c.delta
##   0.0140  -0.520
##   0.0275  -0.490
##   0.1633   0.080
##   0.1107   0.140
##   0.6838   0.020
##   0.2681  -0.260
##   0.8149   0.120
##   0.0032  -0.270
##   0.1143  -0.220
##   0.9690   0.360
##   0.6365   0.200
##   0.0140  -0.200
##   0.5115  -0.215
##   0.0337  -0.370
##   0.0044  -0.185
##   0.6715   0.165
##   0.0794  -0.280
##   0.5045  -0.635
##   0.9690   0.590
##   0.5045   0.050
##   0.1354  -0.400
##   0.0173  -0.405
##   0.3667  -0.400
##   <.0001  -0.095
##   0.5020   0.465
##   0.4144   0.390
##   0.0032  -0.080
##   0.1829  -0.525
##   0.5045  -0.680
##   0.0969  -0.345
##   0.0969   0.065
##   0.1000  -0.185
##   0.0182  -0.335
##   0.5271  -0.605
##   0.9889  -0.565
##   0.5072  -0.140
##   0.3419  -0.225
##   0.0597  -0.580
##   0.0134  -0.475
##   <.0001   0.380
##   0.0049   0.565
##   <.0001  -0.005
##   0.4144  -0.745
##   0.0794  -0.885
##   0.0690  -0.385